csc3007-cyan2

Author

ONG ZHEN YU BRANDON, GOH BOON CHUN, CHNG JUN JIE JEREMY, EISEN REINER BAGUILAT PERDIDO, MAGESHWARAN SO MUTHUSAMY

1 Importing of libraries

library(readr)
library(ggplot2)
library(e1071)
library(readxl)
library(gridExtra)
library(tidyverse)
library(countrycode)
library(sf)
library(tmap)
library(lintr)
library(plotly)
library(MASS) # for boxcox normalisation
library(rmapshaper)
library(mapview)
library(leaflet)
library(RColorBrewer)
library(DT)
library(GGally)
library(dplyr)
library(psych)

2 Selecting the indicators

#mortality_rate_attributed_to_unsafe_water
mortality_rate_unsafe_water <- read_csv("data/indicator_3.9.2.csv",show_col_types = FALSE)

#Proportion_of_population_using_safely_managed_drinking_water_services
proportion_of_safe_water<- read_csv("data/indicator_6.1.1.csv",show_col_types = FALSE)

#mortality_rate_unintentional_poisoning
mortality_rate_unintentional_poisoning <- read_csv("data/indicator_3.9.3.csv",show_col_types = FALSE)

#proportion-of-population-with-basic-handwashing-facilities-on-premises-by-urban-rural-percent
population_with_basic_handwashing_facilities<- read_csv("data/indicator_6.2.1.csv",show_col_types = FALSE)

3 Map of Indicator 1

3.1 Select specific columns for mortality rate unintentional poisoning

mortality_rate_unintentional_poisoning <- dplyr::select(
  mortality_rate_unintentional_poisoning,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  gender = 'sex_desc',
  mortality_rate_unintentional_poisoning_2019 = 'value_2019'
)

# drop rows if contains at least one NA value
mortality_rate_unintentional_poisoning <- na.omit(mortality_rate_unintentional_poisoning)

#check for empty row
any(is.na(mortality_rate_unintentional_poisoning))
[1] FALSE
mortality_rate_unintentional_poisoning
# A tibble: 597 × 6
    code country     region                  iso   gender mortality_rate_unint…¹
   <dbl> <chr>       <chr>                   <chr> <chr>                   <dbl>
 1   470 Malta       Southern Europe         MLT   Both …                    0.1
 2   152 Chile       South America           CHL   Male                      0.5
 3     4 Afghanistan Southern Asia (excludi… AFG   Both …                    1  
 4   470 Malta       Southern Europe         MLT   Female                    0  
 5   156 China       Eastern Asia            CHN   Both …                    1.8
 6     4 Afghanistan Southern Asia (excludi… AFG   Both …                    1  
 7   470 Malta       Southern Europe         MLT   Male                      0.2
 8   156 China       Eastern Asia            CHN   Female                    1.5
 9   478 Mauritania  Western Africa          MRT   Both …                    1.5
10   478 Mauritania  Western Africa          MRT   Female                    1.3
# ℹ 587 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019

3.2 Histogram of mortality rate unintential poisoning for year 2019

# Create the histogram
mortality_histo <- ggplot(mortality_rate_unintentional_poisoning, aes(x=mortality_rate_unintentional_poisoning_2019)) +
  geom_histogram(binwidth=0.1, fill='blue', color='black') +
  labs(title="Mortality rate unintentional poisoning for Year 2019", x="Mortality Rate (per 100,000)", y="Number of Country")

mortality_histo

3.3 Select specific columns for population with basic handwashing facilities

population_with_basic_handwashing_facilities <- dplyr::select(
  population_with_basic_handwashing_facilities,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  area = 'location_desc',
  pop_with_basic_handwashing_facilites_2019 = 'value_2019'
)

# drop rows if contains at least one NA value
population_with_basic_handwashing_facilities <- na.omit(population_with_basic_handwashing_facilities)

#check for empty row
any(is.na(mortality_rate_unintentional_poisoning))
[1] FALSE
population_with_basic_handwashing_facilities
# A tibble: 284 × 6
    code country      region                  iso   area  pop_with_basic_handw…¹
   <dbl> <chr>        <chr>                   <chr> <chr>                  <dbl>
 1     4 Afghanistan  Southern Asia (excludi… AFG   All …                     38
 2   356 India        Southern Asia           IND   Urban                     82
 3     4 Afghanistan  Southern Asia (excludi… AFG   All …                     38
 4   360 Indonesia    South-Eastern Asia      IDN   All …                     94
 5     4 Afghanistan  Southern Asia (excludi… AFG   Rural                     29
 6   694 Sierra Leone Western Africa          SLE   Rural                     18
 7   360 Indonesia    South-Eastern Asia      IDN   Rural                     91
 8     4 Afghanistan  Southern Asia (excludi… AFG   Rural                     29
 9     4 Afghanistan  Southern Asia (excludi… AFG   Urban                     64
10     4 Afghanistan  Southern Asia (excludi… AFG   Urban                     64
# ℹ 274 more rows
# ℹ abbreviated name: ¹​pop_with_basic_handwashing_facilites_2019

3.4 Histogram of population with basic handwashing facilities for year 2019

# Create the histogram
pop_with_handwashing_facilities_histo <- ggplot(population_with_basic_handwashing_facilities, aes(x=pop_with_basic_handwashing_facilites_2019)) +
  geom_histogram(binwidth=5, fill='blue', color='black') +
  labs(title="Population with basic handwashing facilities for Year 2019", x="Population (%)", y="Number of Country")

pop_with_handwashing_facilities_histo

3.5 Left join between mortality rate and basic handwashing

countries_tb <- left_join(
  mortality_rate_unintentional_poisoning,
  population_with_basic_handwashing_facilities,
  by = join_by(country, iso)
) |>
  mutate(
    iso = case_match(
      iso,
      "COD" ~ "ZAR",
      "ROU" ~ "ROM",
      "TLS" ~ "TMP",
      "XKX" ~ "KSV",
      .default = iso
    )
  )
Warning in left_join(mortality_rate_unintentional_poisoning, population_with_basic_handwashing_facilities, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
countries_tb
# A tibble: 1,323 × 10
   code.x country   region.x iso   gender mortality_rate_unint…¹ code.y region.y
    <dbl> <chr>     <chr>    <chr> <chr>                   <dbl>  <dbl> <chr>   
 1    470 Malta     Souther… MLT   Both …                    0.1     NA <NA>    
 2    152 Chile     South A… CHL   Male                      0.5     NA <NA>    
 3      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 4      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 5      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 6      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 7      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 8      4 Afghanis… Souther… AFG   Both …                    1        4 Souther…
 9    470 Malta     Souther… MLT   Female                    0       NA <NA>    
10    156 China     Eastern… CHN   Both …                    1.8     NA <NA>    
# ℹ 1,313 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: area <chr>,
#   pop_with_basic_handwashing_facilites_2019 <dbl>

3.6 Import map

countries_sf <- read_sf("./data/WB_countries_Admin0.geojson")
land_sf <- read_sf("./data/WB_Land.geojson")
countries_sf <-
  countries_sf |>
  ms_simplify() |> # Default argument `keep = 0.05`
  st_make_valid()
land_sf <- ms_simplify(land_sf)
npts(countries_sf)
[1] 30826
countries_sf <-
  countries_sf |>
  left_join(
    countries_tb,
    by = join_by(WB_A3 == iso)
  ) |>
  dplyr::select(country, continent = 'CONTINENT', gender = 'gender', area = 'area', iso = WB_A3, mortality_rate_unintentional_poisoning_2019, pop_with_basic_handwashing_facilites_2019)
countries_sf
Simple feature collection with 1246 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -180 ymin: -55.62754 xmax: 179.9038 ymax: 83.6341
Geodetic CRS:  WGS 84
# A tibble: 1,246 × 8
   country   continent gender     area      iso   mortality_rate_unintentional…¹
   <chr>     <chr>     <chr>      <chr>     <chr>                          <dbl>
 1 Indonesia Asia      Both sexes All areas IDN                              0.3
 2 Indonesia Asia      Both sexes Rural     IDN                              0.3
 3 Indonesia Asia      Both sexes Urban     IDN                              0.3
 4 Indonesia Asia      Female     All areas IDN                              0.1
 5 Indonesia Asia      Female     Rural     IDN                              0.1
 6 Indonesia Asia      Female     Urban     IDN                              0.1
 7 Indonesia Asia      Male       All areas IDN                              0.5
 8 Indonesia Asia      Male       Rural     IDN                              0.5
 9 Indonesia Asia      Male       Urban     IDN                              0.5
10 Malaysia  Asia      Both sexes <NA>      MYS                              0.7
# ℹ 1,236 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
polygon <- st_polygon(x = list(rbind(
  c(-180.0001, 90),
  c(-179.9999, 90),
  c(-179.9999, -90),
  c(-180.0001, -90),
  c(-180.0001, 90)
))) |>
  st_sfc() |>
  st_set_crs(4326) # Equirectangular projection
countries_sf <- mutate(
  countries_sf,
  geometry = st_difference(geometry, polygon)
) |>
  st_make_valid()
tm_shape(countries_sf) + tm_polygons()

3.7 Map of population with basic handwashing facilities for year 2019

countries_sf$area <- replace_na(countries_sf$area, "missing")
choropleth_pop <-
  tm_shape(land_sf, projection = "ESRI:54012") +
  tm_polygons(col = "grey") +
  tm_shape(countries_sf) +
  tm_polygons(
    col = "pop_with_basic_handwashing_facilites_2019",
    border.col = "grey30",
    palette = "Greens",
    breaks = c(-Inf, 20, 40, 60, 80, 100, Inf),
    colorNA = "grey",
    title = "Population (%)",
    lwd = 0.5
  ) +
  tm_text("iso", size = "AREA") +  # Use the new column to set the size
  tm_credits(
    c("", "Source: unstats-undesa.opendata.arcgis.com"),
    position = c("right", "bottom")
  ) +
  tm_layout(
    bg.color = "lightblue",
    frame = FALSE,
    inner.margins = c(0.00, 0.2, 0.2, 0.1),
    earth.boundary = TRUE,
    space.color = "white",
    main.title.size = 0.8,
    title.size = 0.5,
    main.title = "Population with basic handwashing facilities by Country for 2019"
  )

choropleth_pop

3.8 Map of mortality rate unintentional poisoning for year 2019

countries_sf$gender <- replace_na(countries_sf$gender, "missing")
choropleth_mor<-
  tm_shape(land_sf, projection = "ESRI:54012") +
  tm_polygons(col = "grey") +
  tm_shape(countries_sf) +
  tm_polygons(
    col = "mortality_rate_unintentional_poisoning_2019",
    border.col = "grey30",
    palette = "Oranges",
    breaks = c(-Inf, 0.2, 0.4, 0.6, 0.8, 1, Inf),
    colorNA = "grey",
    title = "Mortality Rate \nper 100,000",
    lwd = 0.5
  ) + 
  tm_text("iso", size = "AREA") +
  tm_credits(
    c("", "Source: unstats-undesa.opendata.arcgis.com"),
    position = c("right", "bottom")
  ) +
  tm_layout(
    bg.color = "lightblue",
    frame = FALSE,
    inner.margins = c(0.00, 0.2, 0.2, 0.1),
    earth.boundary = TRUE,
    space.color = "white",
    main.title.size = 0.8,
    title.size = 0.5,
    main.title = "Mortality Rate Unintentional Poisoning by Country for 2019"
  )

choropleth_mor

3.9 Filtering of areas

countries_sf_all_areas <- countries_sf[countries_sf$area == "All areas",]
countries_sf_urban <- countries_sf[countries_sf$area == "Urban",]
countries_sf_rural <- countries_sf[countries_sf$area == "Rural",]

countries_sf_all_areas
Simple feature collection with 324 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -172.7497 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 324 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … All … IDN                      0.3
 2 Indonesia                 Asia      Female All … IDN                      0.1
 3 Indonesia                 Asia      Male   All … IDN                      0.5
 4 Bolivia (Plurinational S… South Am… Both … All … BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Female All … BOL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   All … BOL                      0.6
 7 India                     Asia      Both … All … IND                      0.3
 8 India                     Asia      Female All … IND                      0.2
 9 India                     Asia      Male   All … IND                      0.3
10 Ethiopia                  Africa    Both … All … ETH                      3.3
# ℹ 314 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_urban
Simple feature collection with 306 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -157.449 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 306 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … Urban IDN                      0.3
 2 Indonesia                 Asia      Female Urban IDN                      0.1
 3 Indonesia                 Asia      Male   Urban IDN                      0.5
 4 Bolivia (Plurinational S… South Am… Both … Urban BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Female Urban BOL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   Urban BOL                      0.6
 7 India                     Asia      Both … Urban IND                      0.3
 8 India                     Asia      Female Urban IND                      0.2
 9 India                     Asia      Male   Urban IND                      0.3
10 Ethiopia                  Africa    Both … Urban ETH                      3.3
# ℹ 296 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_rural
Simple feature collection with 309 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -157.449 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 309 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … Rural IDN                      0.3
 2 Indonesia                 Asia      Female Rural IDN                      0.1
 3 Indonesia                 Asia      Male   Rural IDN                      0.5
 4 Bolivia (Plurinational S… South Am… Both … Rural BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Female Rural BOL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   Rural BOL                      0.6
 7 Peru                      South Am… Both … Rural PER                      0.4
 8 Peru                      South Am… Female Rural PER                      0.3
 9 Peru                      South Am… Male   Rural PER                      0.5
10 India                     Asia      Both … Rural IND                      0.3
# ℹ 299 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
# Get a color palette with three colors
colors <- brewer.pal(3, "Set1")

# Assign colors to the categories
colorpal_area <- colorFactor(colors, 
                               levels = c("All areas", "Urban", "Rural"))

3.10 Map of population with basic handwashing facilities for all areas

leaflet(countries_sf_all_areas) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_area(area),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
                             "<br>Area: ", area)) %>%
  addLegend(pal = colorpal_area, 
            values = ~area, 
            title = "Area", 
            position = "bottomright")

3.11 Map of population with basic handwashing facilities for urban areas

leaflet(countries_sf_urban) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_area(area),  # use colorpal_area here
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  # This adds labels that appear when you hover over a country
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
                             "<br>Area: ", area)) %>%
  addLegend(pal = colorpal_area, 
            values = ~area, 
            title = "Area", 
            position = "bottomright")

3.12 Map of population with basic handwashing facilities for rural areas

leaflet(countries_sf_rural) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_area(area),  # use colorpal_area here
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  # This adds labels that appear when you hover over a country
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
                             "<br>Area: ", area)) %>%
  addLegend(pal = colorpal_area, 
            values = ~area, 
            title = "Area", 
            position = "bottomright")

3.13 Filtering of genders

countries_sf_male <- countries_sf[countries_sf$gender == "Male",]
countries_sf_female <- countries_sf[countries_sf$gender == "Female",]
countries_sf_both <- countries_sf[countries_sf$gender == "Both sexes",]

countries_sf_male
Simple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS:  WGS 84
# A tibble: 410 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Male   All … IDN                      0.5
 2 Indonesia                 Asia      Male   Rural IDN                      0.5
 3 Indonesia                 Asia      Male   Urban IDN                      0.5
 4 Malaysia                  Asia      Male   miss… MYS                      1  
 5 Chile                     South Am… Male   miss… CHL                      0.5
 6 Bolivia (Plurinational S… South Am… Male   All … BOL                      0.6
 7 Bolivia (Plurinational S… South Am… Male   Rural BOL                      0.6
 8 Bolivia (Plurinational S… South Am… Male   Urban BOL                      0.6
 9 Peru                      South Am… Male   Rural PER                      0.5
10 Argentina                 South Am… Male   miss… ARG                      0.5
# ℹ 400 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_female
Simple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS:  WGS 84
# A tibble: 410 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Female All … IDN                      0.1
 2 Indonesia                 Asia      Female Rural IDN                      0.1
 3 Indonesia                 Asia      Female Urban IDN                      0.1
 4 Malaysia                  Asia      Female miss… MYS                      0.3
 5 Chile                     South Am… Female miss… CHL                      0.2
 6 Bolivia (Plurinational S… South Am… Female All … BOL                      0.5
 7 Bolivia (Plurinational S… South Am… Female Rural BOL                      0.5
 8 Bolivia (Plurinational S… South Am… Female Urban BOL                      0.5
 9 Peru                      South Am… Female Rural PER                      0.3
10 Argentina                 South Am… Female miss… ARG                      0.3
# ℹ 400 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
countries_sf_both
Simple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS:  WGS 84
# A tibble: 410 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
   <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … All … IDN                      0.3
 2 Indonesia                 Asia      Both … Rural IDN                      0.3
 3 Indonesia                 Asia      Both … Urban IDN                      0.3
 4 Malaysia                  Asia      Both … miss… MYS                      0.7
 5 Chile                     South Am… Both … miss… CHL                      0.4
 6 Bolivia (Plurinational S… South Am… Both … All … BOL                      0.6
 7 Bolivia (Plurinational S… South Am… Both … Rural BOL                      0.6
 8 Bolivia (Plurinational S… South Am… Both … Urban BOL                      0.6
 9 Peru                      South Am… Both … Rural PER                      0.4
10 Argentina                 South Am… Both … miss… ARG                      0.4
# ℹ 400 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>
# Get a color palette with three colors
colors <- brewer.pal(3, "Set1")

# Assign colors to the categories
colorpal_gender <- colorFactor(colors, 
                               levels = c("Female", "Male", "Both sexes"))

3.14 Map of mortality rate unintentional poisoning for male gender

# Replace NAs in 'gender' column
countries_sf_male$gender <- replace_na(countries_sf_male$gender, "missing")

leaflet(countries_sf_male) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_gender(gender),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
                             "<br>Gender: ", gender)) %>%
  addLegend(pal = colorpal_gender, 
            values = ~gender, 
            title = "Gender", 
            position = "bottomright")

3.15 Map of mortality rate unintentional poisoning for female gender

# Replace NAs in 'gender' column
countries_sf_female$gender <- replace_na(countries_sf_female$gender, "missing")

leaflet(countries_sf_female) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_gender(gender),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
                             "<br>Gender: ", gender)) %>%
  addLegend(pal = colorpal_gender, 
            values = ~gender, 
            title = "Gender", 
            position = "bottomright")

3.16 Map of mortality rate unintentional poisoning for both gender

# Replace NAs in 'gender' column
countries_sf_both$gender <- replace_na(countries_sf_both$gender, "missing")

leaflet(countries_sf_both) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorpal_gender(gender),  # Assign color based on gender
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~iso,  
              popup = ~paste("Country Code: ", iso,
                             "<br>Country: ", country,
                             "<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
                             "<br>Gender: ", gender)) %>%
  addLegend(pal = colorpal_gender, 
            values = ~gender, 
            title = "Gender", 
            position = "bottomright")

4 Plots of Indicator between mortality rate and population with basic handwashing facilities

countries_sf <- countries_sf[!is.na(countries_sf$mortality_rate_unintentional_poisoning_2019) & !is.na(countries_sf$pop_with_basic_handwashing_facilites_2019), ]

countries_sf <- countries_sf %>%
  filter(gender == 'Both sexes')

countries_sf
Simple feature collection with 313 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -172.7497 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS:  WGS 84
# A tibble: 313 × 8
   country                   continent gender area  iso   mortality_rate_unint…¹
 * <chr>                     <chr>     <chr>  <chr> <chr>                  <dbl>
 1 Indonesia                 Asia      Both … All … IDN                      0.3
 2 Indonesia                 Asia      Both … Rural IDN                      0.3
 3 Indonesia                 Asia      Both … Urban IDN                      0.3
 4 Bolivia (Plurinational S… South Am… Both … All … BOL                      0.6
 5 Bolivia (Plurinational S… South Am… Both … Rural BOL                      0.6
 6 Bolivia (Plurinational S… South Am… Both … Urban BOL                      0.6
 7 Peru                      South Am… Both … Rural PER                      0.4
 8 India                     Asia      Both … Urban IND                      0.3
 9 India                     Asia      Both … All … IND                      0.3
10 India                     Asia      Both … Rural IND                      0.3
# ℹ 303 more rows
# ℹ abbreviated name: ¹​mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
#   geometry <MULTIPOLYGON [°]>

4.1 Correlation

# Plot the data
plot_loess <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, y = pop_with_basic_handwashing_facilites_2019)) +
  
  geom_point(aes(color = area, 
                 shape = area,
                 text = paste("Country:", country, "<br>", "Location:", area)), alpha = 0.7) +
  
  geom_smooth(data = subset(countries_sf, area == "All areas"),
              method = loess, se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  geom_smooth(data = subset(countries_sf, area == "Urban"),
              method = loess, se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +

  geom_smooth(data = subset(countries_sf, area == "Rural"),
              method = loess, se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  labs(x = "Mortality Rate (deaths per 100,000 population) for 2019 ",
       y = "Population with Basic Handwashing Facilities for 2019 (%)",
       title = "Correlation between Mortality Rate and Basic Handwashing Facilities \n for Both Gender",
       color = "Location Type",
       shape = "Location Type") +
  
  theme_minimal() +
  
  guides(color = guide_legend(override.aes = list(shape = 16)),
         shape = guide_legend(override.aes = list(color = "black")))
Warning in geom_point(aes(color = area, shape = area, text = paste("Country:",
: Ignoring unknown aesthetics: text
# Convert to a plotly plot
gp_map_1 <- ggplotly(plot_loess, tooltip = "text")
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp_map_1
correlation_all <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "All areas"], 
                       countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "All areas"])

correlation_urban <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Urban"], 
                         countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "Urban"])

correlation_rural <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Rural"], 
                         countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "Rural"])

blank_plot <- ggplot() +
  theme_void() +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  annotate("text",
           x = 0.5,
           y = 0.5,
           label = paste("Correlation All:",
                         round(correlation_all, 2), "\n",
                         "Correlation Urban:",
                         round(correlation_urban, 2), "\n",
                         "Correlation Rural:",
                         round(correlation_rural, 2)),
           size = 5,
           color = "black",
           hjust = 0.5)

# Convert to a plotly plot
gp_text_map_1 <- ggplotly(blank_plot)

gp_text_map_1
stacked_bar_chart <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, fill = area)) +
  geom_bar(position = "stack") +
  labs(x = "Mortality Rate (deaths per 100,000 population)",
       y = "Count",
       title = "Stacked Bar Chart of Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart)

# Create a grouped bar chart
grouped_bar_chart <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, fill = area)) +
  geom_bar(position = "dodge") +
  labs(x = "Mortality Rate (deaths per 100,000 population)",
       y = "Count",
       title = "Grouped Bar Chart of Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart)

4.2 Barchart and stacked barchart (without Transformation)

ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart, width = 1000, height = 600, autosize = TRUE)
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart, width = 1000, height = 600, autosize = TRUE)

ggplot_stacked_bar_chart
ggplot_grouped_bar_chart

4.3 Normalise Data

#Ensure you have the MASS library installed

countries_sf$mortality_rate_unintentional_poisoning_2019.positive <- countries_sf$mortality_rate_unintentional_poisoning_2019 + abs(min(countries_sf$mortality_rate_unintentional_poisoning_2019)) + 0.1
# Apply the Box-Cox transformation
bc_result <- MASS::boxcox(countries_sf$mortality_rate_unintentional_poisoning_2019.positive ~ 1, 
                    lambda = seq(-3,3,0.1))

# The optimal lambda value is the one that maximizes the log-likelihood
optimal_lambda <- bc_result$x[which.max(bc_result$y)]

# Transform the data using the optimal lambda value
if (optimal_lambda == 0) {
  countries_sf$latest_value.x_bc <- log(countries_sf$mortality_rate_unintentional_poisoning_2019.positive)
} else {
  countries_sf$latest_value.x_bc <- (countries_sf$mortality_rate_unintentional_poisoning_2019.positive^optimal_lambda - 1) / optimal_lambda
}

# Shift the transformed variable to be non-negative
min_value <- min(countries_sf$latest_value.x_bc)
countries_sf$latest_value.x_bc <- countries_sf$latest_value.x_bc + abs(min_value) + 0.1

4.4 Histogram (Normalised)

# Round the transformed mortality rates to the nearest whole number
countries_sf$latest_value.x_bc_rounded <- round(countries_sf$latest_value.x_bc)

# Convert the latest_value.x_bc_rounded variable into a categorical variable
countries_sf$latest_value.x_bc_cat <- cut(countries_sf$latest_value.x_bc_rounded, breaks = 50)

# Create a stacked bar chart
stacked_bar_chart_bc <- ggplot(countries_sf, aes(x = latest_value.x_bc_rounded, fill = area)) +
  geom_bar(position = "stack", bins =20) +
  labs(x = "Transformed Mortality Rate",
       y = "Count",
       title = "Stacked Bar Chart of Transformed Mortality Rates") +
  theme_minimal()
Warning in geom_bar(position = "stack", bins = 20): Ignoring unknown
parameters: `bins`
ggplot_stacked_bar_chart_bc <- ggplotly(stacked_bar_chart_bc)
ggplot_stacked_bar_chart_bc
# Create a grouped bar chart
grouped_bar_chart_bc <- ggplot(countries_sf, aes(x = latest_value.x_bc_rounded, fill = area)) +
  geom_bar(position = "dodge") +
  labs(x = "Transformed Mortality Rate",
       y = "Count",
       title = "Grouped Bar Chart of Transformed Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_grouped_bar_chart_bc <- ggplotly(grouped_bar_chart_bc)
ggplot_grouped_bar_chart_bc

4.5 Scatter plot Normalised

plot_loess <- ggplot(countries_sf, aes(x = latest_value.x_bc, y = pop_with_basic_handwashing_facilites_2019)) +
  
  geom_point(aes(color = area, 
                 shape = area,
                 text = paste("Country:", country, "<br>", "Location:", area)), alpha = 0.7) +
  
  geom_smooth(data = subset(countries_sf, area == "All areas"),
              method = loess, se = FALSE, 
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  geom_smooth(data = subset(countries_sf, area == "Urban"),
              method = loess,
              se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +

  geom_smooth(data = subset(countries_sf, area == "Rural"),
              method = loess,
              se = FALSE,
              aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
              show.legend = TRUE) +
  
  labs(x = "Transformed Mortality Rate (deaths per 100,000 population) for 2019",
       y = "Population with Basic Handwashing Facilities for 2019 (%)",
       title = "Correlation between Mortality Rate and Basic Handwashing Facilities (Normalised) for Both Gender",
       color = "Location Type",
       shape = "Location Type") +
  
  theme_minimal() +
  
  guides(color = guide_legend(override.aes = list(shape = 16)),
         shape = guide_legend(override.aes = list(color = "black")))
Warning in geom_point(aes(color = area, shape = area, text = paste("Country:",
: Ignoring unknown aesthetics: text
gp_normalised_map_1 <- ggplotly(plot_loess, tooltip = "text")
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp_normalised_map_1

4.6 Correlation Normalised

correlation_all <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "All areas"], 
                       countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "All areas"])

correlation_urban <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "Urban"], 
                         countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Urban"])

correlation_rural <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "Rural"], 
                         countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Rural"])

blank_plot <- ggplot() +
  theme_void() +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  annotate("text",
           x = 0.5,
           y = 0.5,
           label = paste("Correlation All:",
                         round(correlation_all, 2), "\n",
                         "Correlation Urban:",
                         round(correlation_urban, 2), "\n",
                         "Correlation Rural:",
                         round(correlation_rural, 2)),
           size = 5,
           color = "black",
           hjust = 0.5)

gp_text_normalised_map_1 <- ggplotly(blank_plot)
gp_text_normalised_map_1

4.7 Combining normal and unormalised plots

combined_plot_map_1 <- subplot(
  gp_map_1, gp_text_map_1, 
  gp_normalised_map_1, gp_text_normalised_map_1, 
  nrows = 2, margin = 0.05
)

# Print the combined plot
combined_plot_map_1

5 Map of Indicator 2

6 Retrieve Data

6.1 Mortality Rate due to Unsafe Water

mortality_rate_unsafe_water_map <- dplyr::select(
  mortality_rate_unsafe_water,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  mortality_rate_unsafe_water = 'latest_value'
)

mortality_rate_unsafe_water_map <- mortality_rate_unsafe_water_map%>%
  distinct()

mortality_rate_unsafe_water_map
# A tibble: 183 × 5
    code country                          region    iso   mortality_rate_unsaf…¹
   <dbl> <chr>                            <chr>     <chr>                  <dbl>
 1   583 Micronesia (Federated States of) Oceania … FSM                      3.6
 2   496 Mongolia                         Eastern … MNG                      1.3
 3   499 Montenegro                       Southern… MNE                      0  
 4   504 Morocco                          Northern… MAR                      1.9
 5   508 Mozambique                       Eastern … MOZ                     27.6
 6   104 Myanmar                          South-Ea… MMR                     12.6
 7   516 Namibia                          Southern… NAM                     18.3
 8   524 Nepal                            Southern… NPL                     19.8
 9   528 Netherlands                      Western … NLD                      0.2
10   554 New Zealand                      Australi… NZL                      0.1
# ℹ 173 more rows
# ℹ abbreviated name: ¹​mortality_rate_unsafe_water

6.2 Proportion of safe water management

proportion_of_safe_water_map <- dplyr::select(
  proportion_of_safe_water,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  urbanisation = "location_desc",
  value_2019 = "value_2019"
)

proportion_of_safe_water_map <- proportion_of_safe_water_map%>%
  distinct()

proportion_of_safe_water_map
# A tibble: 286 × 6
    code country                            region iso   urbanisation value_2019
   <dbl> <chr>                              <chr>  <chr> <chr>             <dbl>
 1   807 North Macedonia                    South… MKD   Rural                66
 2   804 Ukraine                            Easte… UKR   Urban                89
 3   807 North Macedonia                    South… MKD   Urban                85
 4   826 United Kingdom of Great Britain a… North… GBR   All areas           100
 5   580 Northern Mariana Islands           Ocean… MNP   All areas            91
 6   840 United States of America           North… USA   All areas            97
 7   578 Norway                             North… NOR   All areas            99
 8   512 Oman                               Weste… OMN   All areas            90
 9   586 Pakistan                           South… PAK   All areas            36
10   586 Pakistan                           South… PAK   Rural                33
# ℹ 276 more rows

7 Cleaning and fixing geometry data

countries_geom <- read_sf("./data/WB_countries_Admin0.geojson")
countries_boundary <- st_boundary(countries_geom)
countries_sf_simplified <- ms_simplify(countries_boundary, keep = 0.05)
countries_sf_valid <- st_make_valid(countries_sf_simplified)
countries_wrapped <- st_wrap_dateline(countries_sf_valid)
polygon <- st_polygon(x = list(rbind(c(-180.0001, 90),
                                     c(-179.9999, 90),
                                     c(-179.9999, -90),
                                     c(-180.0001, -90),
                                     c(-180.0001, 90)))) |>
  st_sfc() |>
  st_set_crs(4326)

countries_wrapped <- countries_wrapped |>
  st_difference(polygon)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
land_sf <- read_sf("data/WB_Land.geojson")
land_boundary <- st_boundary(land_sf)
land_sf_simplified <- ms_simplify(land_boundary, keep = 0.05)
land_sf_valid <- st_make_valid(land_sf_simplified)
land_wrapped <- st_wrap_dateline(land_sf_valid)
polygon <- st_polygon(x = list(rbind(c(-180.0001, 90),
                                     c(-179.9999, 90),
                                     c(-179.9999, -90),
                                     c(-180.0001, -90),
                                     c(-180.0001, 90)))) |>
  st_sfc() |>
  st_set_crs(4326)

land_wrapped <- land_wrapped |>
  st_difference(polygon)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
countries_wrapped <- countries_wrapped |>
  st_difference(polygon)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
# Washing data combined
countries_sf_water <- left_join(
    countries_wrapped,
  proportion_of_safe_water_map,
  by = c("WB_A3" = "iso")
)
Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3 of `x` matches multiple rows in `y`.
ℹ Row 6 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
countries_sf_water
Simple feature collection with 325 features and 57 fields
Geometry type: MULTILINESTRING
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.6341
Geodetic CRS:  WGS 84
# A tibble: 325 × 58
   REGION_WB   ISO_N3 ISO_A3_EH NAME_AR ISO_A3 GDP_MD_EST NAME_VI WB_NAME ISO_A2
   <chr>       <chr>  <chr>     <chr>   <chr>       <dbl> <chr>   <chr>   <chr> 
 1 East Asia … 360    IDN       إندوني… IDN       3028000 Indone… Indone… ID    
 2 East Asia … 458    MYS       ماليزيا MYS        863000 Malays… Malays… MY    
 3 Latin Amer… 152    CHL       تشيلي   CHL        436100 Chile   Chile   CL    
 4 Latin Amer… 152    CHL       تشيلي   CHL        436100 Chile   Chile   CL    
 5 Latin Amer… 068    BOL       بوليفيا BOL         78350 Bolivia Bolivia BO    
 6 Latin Amer… 604    PER       بيرو    PER        410400 Peru    Peru    PE    
 7 Latin Amer… 604    PER       بيرو    PER        410400 Peru    Peru    PE    
 8 Latin Amer… 604    PER       بيرو    PER        410400 Peru    Peru    PE    
 9 Latin Amer… 032    ARG       الأرجن… ARG        879400 Argent… Argent… AR    
10 Europe & C… 196    CYP       قبرص    CYP         29260 Cộng h… Cyprus  CY    
# ℹ 315 more rows
# ℹ 49 more variables: UN_A3 <chr>, FIPS_10_ <chr>, NAME_EN <chr>,
#   NAME_EL <chr>, WB_RULES <chr>, OBJECTID <int>, NAME_DE <chr>,
#   SUBREGION <chr>, featurecla <chr>, NAME_ZH <chr>, WB_A3 <chr>,
#   LASTCENSUS <int>, TYPE <chr>, NAME_PT <chr>, NAME_BN <chr>,
#   FORMAL_EN <chr>, POP_YEAR <int>, REGION_UN <chr>, LEVEL <int>,
#   POP_RANK <int>, NAME_IT <chr>, FORMAL_FR <chr>, CONTINENT <chr>, …
# Mortality Rate Combined
countries_sf_mortality <- left_join(
    countries_wrapped,
  mortality_rate_unsafe_water_map,
  by = c("WB_A3" = "iso")
)

countries_sf_mortality
Simple feature collection with 197 features and 56 fields
Geometry type: MULTILINESTRING
Dimension:     XY
Bounding box:  xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.6341
Geodetic CRS:  WGS 84
# A tibble: 197 × 57
   REGION_WB   ISO_N3 ISO_A3_EH NAME_AR ISO_A3 GDP_MD_EST NAME_VI WB_NAME ISO_A2
   <chr>       <chr>  <chr>     <chr>   <chr>       <dbl> <chr>   <chr>   <chr> 
 1 East Asia … 360    IDN       إندوني… IDN       3028000 Indone… Indone… ID    
 2 East Asia … 458    MYS       ماليزيا MYS        863000 Malays… Malays… MY    
 3 Latin Amer… 152    CHL       تشيلي   CHL        436100 Chile   Chile   CL    
 4 Latin Amer… 068    BOL       بوليفيا BOL         78350 Bolivia Bolivia BO    
 5 Latin Amer… 604    PER       بيرو    PER        410400 Peru    Peru    PE    
 6 Latin Amer… 032    ARG       الأرجن… ARG        879400 Argent… Argent… AR    
 7 Europe & C… 196    CYP       قبرص    CYP         29260 Cộng h… Cyprus  CY    
 8 South Asia  356    IND       الهند   IND       8721000 Ấn Độ   India   IN    
 9 East Asia … 156    CHN       جمهوري… CHN      21140000 Cộng h… China   CN    
10 Middle Eas… 376    ISR       إسرائيل ISR        297000 Israel  Israel  IL    
# ℹ 187 more rows
# ℹ 48 more variables: UN_A3 <chr>, FIPS_10_ <chr>, NAME_EN <chr>,
#   NAME_EL <chr>, WB_RULES <chr>, OBJECTID <int>, NAME_DE <chr>,
#   SUBREGION <chr>, featurecla <chr>, NAME_ZH <chr>, WB_A3 <chr>,
#   LASTCENSUS <int>, TYPE <chr>, NAME_PT <chr>, NAME_BN <chr>,
#   FORMAL_EN <chr>, POP_YEAR <int>, REGION_UN <chr>, LEVEL <int>,
#   POP_RANK <int>, NAME_IT <chr>, FORMAL_FR <chr>, CONTINENT <chr>, …

8 Creating the Maps

8.1 Get the different values for all areas, urban and rural in water management

countries_sf_water_all <- countries_sf_water %>% filter(!(urbanisation %in% c("Rural","Urban")))
countries_sf_water_urban <- countries_sf_water %>% filter(!(urbanisation %in% c("All area","Urban")))
countries_sf_water_rural <- countries_sf_water %>% filter(!(urbanisation %in% c("Urban","All areas")))

8.1.1 all areas for water management

# Load the required libraries
library(leaflet)

# Define the color palette function
colorPalette <- colorRampPalette(c("#FF6969", "#00A0D6"))


# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)

# Create a Leaflet map
all_map <- leaflet(countries_sf_water_all) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(value_2019),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~WB_A3,  
              popup = ~paste("Country Code: ", WB_A3,
                             "<br>Country: ", country,
                             "<br>Proportion of population access to safely managed drinking water service: ", value_2019))%>%
  addLegend(
    position = "bottomright",
    pal = colorNumeric(palette = colors, domain = c(0, 100)),
    values = c(0, 20, 40, 60, 80, 100),
    title = "Proportion (%)",
    labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
    opacity = 1
  )

all_map

8.1.2 Urban areas

# Define the color palette function
colorPalette <- colorRampPalette(c("#FF6969", "#00A0D6"))


# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)

# Create a Leaflet map
urban_map <- leaflet(countries_sf_water_urban) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(value_2019),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~WB_A3,
              popup = ~paste("Country Code: ", WB_A3,
                             "<br>Country: ", country,
                             "<br>Proportion of population access to safely managed drinking water service: ", value_2019))%>%
  addLegend(
    position = "bottomright",
    pal = colorNumeric(palette = colors, domain = c(0, 100)),
    values = c(0, 20, 40, 60, 80, 100),
    title = "Proportion (%)",
    labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
    opacity = 1
  )

urban_map

8.1.3 Rural

# Define the color palette function
colorPalette <- colorRampPalette(c("#FF6969", "#00A0D6"))


# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)

# Create a Leaflet map
rural_map <- leaflet(countries_sf_water_rural) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(value_2019),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~WB_A3,  
              popup = ~paste("Country Code: ", WB_A3,
                             "<br>Country: ", country,
                             "<br>Proportion of population access to safely managed drinking water service: ", value_2019))%>%
  addLegend(
    position = "bottomright",
    pal = colorNumeric(palette = colors, domain = c(0, 100)),
    values = c(0, 20, 40, 60, 80, 100),
    title = "Proportion (%)",
    labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
    opacity = 1
  )

rural_map

8.2 Create map for mortality rate

# Define the color palette function
colorPalette <- colorRampPalette(c("#FFFFDF", "#FFEEAA", "#FFBB55", "#FF7700", "#FF4400"))


# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)

# Create a Leaflet map
mortality_mapping <- leaflet(countries_sf_mortality) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(mortality_rate_unsafe_water),  
              fillOpacity = 0.8,
              weight = 2,
              opacity = 1,
              color = "grey",
              dashArray = "3",
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                dashArray = "",
                fillOpacity = 0.7,
                bringToFront = TRUE),
              label = ~WB_A3,  
              popup = ~paste("Country Code: ", WB_A3,
                             "<br>Country: ", country,
                             "<br>Mortality Rate from Unsafe Sanitation and Water per 100,000: ", mortality_rate_unsafe_water))%>%
  addLegend(
    position = "bottomright",
    pal = colorNumeric(palette = colors, domain = c(0, 100)),
    values = c(0, 20, 40, 60, 80, 100),
    title = "Proportion (100, 000)",
    labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
    opacity = 1
  )
Warning in colorNumeric(palette = colors, domain = c(0, 20, 40, 60, 80, : Some
values were outside the color scale and will be treated as NA
mortality_mapping
# Plots of Indicator between mortality rate due to unsafe drinking water and proportion of population using safely managed drinking water services.
merged_data <- merge(mortality_rate_unsafe_water, proportion_of_safe_water, by=c('geoAreaCode', 'geoAreaName'))
merged_data <- merged_data[!is.na(merged_data$value_2019), ] #remove NA values for 2019
# Display the first few rows of the merged dataframe
head(merged_data)
  geoAreaCode geoAreaName ObjectId.x goal_code.x goal_labelEN.x
1         100    Bulgaria        113           3         Goal 3
2         104     Myanmar          7           3         Goal 3
3         104     Myanmar          7           3         Goal 3
4         104     Myanmar          7           3         Goal 3
5         112     Belarus        102           3         Goal 3
6         116    Cambodia        117           3         Goal 3
                                                    goal_descEN.x target_code.x
1 Ensure healthy lives and promote well-being for all at all ages           3.9
2 Ensure healthy lives and promote well-being for all at all ages           3.9
3 Ensure healthy lives and promote well-being for all at all ages           3.9
4 Ensure healthy lives and promote well-being for all at all ages           3.9
5 Ensure healthy lives and promote well-being for all at all ages           3.9
6 Ensure healthy lives and promote well-being for all at all ages           3.9
                                                                                                                                target_descEN.x
1 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
2 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
3 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
4 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
5 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
6 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
  indicator_code.x indicator_reference.x
1          C030902                 3.9.2
2          C030902                 3.9.2
3          C030902                 3.9.2
4          C030902                 3.9.2
5          C030902                 3.9.2
6          C030902                 3.9.2
                                                                                                                                           indicator_descEN.x
1 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
2 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
3 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
4 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
5 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
6 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
  series_release.x
1     2021.Q2.G.03
2     2021.Q2.G.03
3     2021.Q2.G.03
4     2021.Q2.G.03
5     2021.Q2.G.03
6     2021.Q2.G.03
                                                                                                   series_tags.x
1 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
2 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
3 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
4 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
5 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
6 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
     series.x
1 SH_STA_WASH
2 SH_STA_WASH
3 SH_STA_WASH
4 SH_STA_WASH
5 SH_STA_WASH
6 SH_STA_WASH
                                                                                               seriesDescription.x
1 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
2 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
3 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
4 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
5 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
6 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
  level_.x parentCode.x       parentName.x  type.x       X.x      Y.x ISO3.x
1        5          151     Eastern Europe Country  25.23763 42.75731    BGR
2        4           35 South-Eastern Asia Country  96.51752 21.19333    MMR
3        4           35 South-Eastern Asia Country  96.51752 21.19333    MMR
4        4           35 South-Eastern Asia Country  96.51752 21.19333    MMR
5        5          151     Eastern Europe Country  28.04940 53.54193    BLR
6        4           35 South-Eastern Asia Country 104.92284 12.71164    KHM
  UN_Member.x Country_Profile.x UnitMultiplier.x   Units_code.x
1           1                 1               NA PER_100000_POP
2           1                 1               NA PER_100000_POP
3           1                 1               NA PER_100000_POP
4           1                 1               NA PER_100000_POP
5           1                 1               NA PER_100000_POP
6           1                 1               NA PER_100000_POP
            Units_desc.x timeSeries_id.x timeSeries_keys.x n_years.x min_year.x
1 Per 100,000 population     SH_STA_WASH                NA         1       2016
2 Per 100,000 population     SH_STA_WASH                NA         1       2016
3 Per 100,000 population     SH_STA_WASH                NA         1       2016
4 Per 100,000 population     SH_STA_WASH                NA         1       2016
5 Per 100,000 population     SH_STA_WASH                NA         1       2016
6 Per 100,000 population     SH_STA_WASH                NA         1       2016
  max_year.x years.x value_2016.x latest_value.x footnotes.x          nature.x
1       2016  [2016]          0.1            0.1          NA E: Estimated data
2       2016  [2016]         12.6           12.6          NA E: Estimated data
3       2016  [2016]         12.6           12.6          NA E: Estimated data
4       2016  [2016]         12.6           12.6          NA E: Estimated data
5       2016  [2016]          0.1            0.1          NA E: Estimated data
6       2016  [2016]          6.5            6.5          NA E: Estimated data
  ObjectId.y goal_code.y goal_labelEN.y
1        263           6         Goal 6
2        204           6         Goal 6
3        203           6         Goal 6
4        202           6         Goal 6
5        250           6         Goal 6
6        264           6         Goal 6
                                                                   goal_descEN.y
1 Ensure availability and sustainable management of water and sanitation for all
2 Ensure availability and sustainable management of water and sanitation for all
3 Ensure availability and sustainable management of water and sanitation for all
4 Ensure availability and sustainable management of water and sanitation for all
5 Ensure availability and sustainable management of water and sanitation for all
6 Ensure availability and sustainable management of water and sanitation for all
  target_code.y
1           6.1
2           6.1
3           6.1
4           6.1
5           6.1
6           6.1
                                                                                target_descEN.y
1 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
2 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
3 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
4 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
5 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
6 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
  indicator_code.y indicator_reference.y
1          C060101                 6.1.1
2          C060101                 6.1.1
3          C060101                 6.1.1
4          C060101                 6.1.1
5          C060101                 6.1.1
6          C060101                 6.1.1
                                                     indicator_descEN.y
1 Proportion of population using safely managed drinking water services
2 Proportion of population using safely managed drinking water services
3 Proportion of population using safely managed drinking water services
4 Proportion of population using safely managed drinking water services
5 Proportion of population using safely managed drinking water services
6 Proportion of population using safely managed drinking water services
  series_release.y               series_tags.y    series.y
1     2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
2     2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
3     2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
4     2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
5     2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
6     2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
                                                                        seriesDescription.y
1 Proportion of population using safely managed drinking water services, by urban/rural (%)
2 Proportion of population using safely managed drinking water services, by urban/rural (%)
3 Proportion of population using safely managed drinking water services, by urban/rural (%)
4 Proportion of population using safely managed drinking water services, by urban/rural (%)
5 Proportion of population using safely managed drinking water services, by urban/rural (%)
6 Proportion of population using safely managed drinking water services, by urban/rural (%)
  level_.y parentCode.y       parentName.y  type.y       X.y      Y.y ISO3.y
1        5          151     Eastern Europe Country  25.23763 42.75731    BGR
2        4           35 South-Eastern Asia Country  96.51752 21.19333    MMR
3        4           35 South-Eastern Asia Country  96.51752 21.19333    MMR
4        4           35 South-Eastern Asia Country  96.51752 21.19333    MMR
5        5          151     Eastern Europe Country  28.04940 53.54193    BLR
6        4           35 South-Eastern Asia Country 104.92284 12.71164    KHM
  UN_Member.y Country_Profile.y location_code location_desc UnitMultiplier.y
1           1                 1            _T     All areas               NA
2           1                 1             U         Urban               NA
3           1                 1             R         Rural               NA
4           1                 1            _T     All areas               NA
5           1                 1            _T     All areas               NA
6           1                 1            _T     All areas               NA
  Units_code.y Units_desc.y timeSeries_id.y timeSeries_keys.y n_years.y
1           PT   Percentage SH_H2O_SAFE___T          location        21
2           PT   Percentage  SH_H2O_SAFE__U          location        21
3           PT   Percentage  SH_H2O_SAFE__R          location        21
4           PT   Percentage SH_H2O_SAFE___T          location        21
5           PT   Percentage SH_H2O_SAFE___T          location        21
6           PT   Percentage SH_H2O_SAFE___T          location        21
  min_year.y max_year.y
1       2000       2020
2       2000       2020
3       2000       2020
4       2000       2020
5       2000       2020
6       2000       2020
                                                                                                                         years.y
1 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
2 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
3 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
4 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
5 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
6 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
  value_2000 value_2001 value_2002 value_2003 value_2004 value_2005 value_2006
1         90         90         91         91         92         92         93
2         51         52         54         56         58         59         61
3         18         20         21         23         24         26         27
4         27         29         30         32         33         35         37
5         81         81         81         81         83         84         85
6         17         17         18         18         19         19         20
  value_2007 value_2008 value_2009 value_2010 value_2011 value_2012 value_2013
1         93         94         94         95         95         96         96
2         63         65         67         68         70         70         71
3         29         30         32         34         35         37         39
4         38         40         42         44         45         47         48
5         86         88         89         90         91         92         93
6         20         21         21         22         23         23         24
  value_2014 value_2015 value_2016.y value_2017 value_2018 value_2019
1         97         97           97         97         97         98
2         71         71           72         72         73         73
3         41         43           45         47         49         51
4         50         51           53         54         56         58
5         94         94           95         95         95         95
6         24         25           25         26         27         27
  value_2020 latest_value.y footnotes.y          nature.y
1         98             98          NA E: Estimated data
2         74             74          NA E: Estimated data
3         52             52          NA E: Estimated data
4         59             59          NA E: Estimated data
5         95             95          NA E: Estimated data
6         28             28          NA E: Estimated data

8.3 Correlation

# Plot the data
plot_loess <- ggplot(merged_data, aes(x = latest_value.x, y = value_2019)) +
  
  geom_point(aes(color = location_desc, 
                 shape = location_desc,
                 text = paste("Country:", geoAreaName, "<br>", "Location:", location_desc)), alpha = 0.7) +
  
  geom_smooth(data = subset(merged_data, location_desc == "All areas"),
              method = loess, se = FALSE, 
              aes(color = location_desc, weight = value_2019),
              show.legend = TRUE) +
  
  geom_smooth(data = subset(merged_data, location_desc == "Urban"),
              method = loess,
              se = FALSE,
              aes(color = location_desc, weight = value_2019),
              show.legend = TRUE) +

  geom_smooth(data = subset(merged_data, location_desc == "Rural"),
              method = loess,
              se = FALSE,
              aes(color = location_desc, weight = value_2019),
              show.legend = TRUE) +
  
  labs(x = "Mortality Rate (deaths per 100,000 population)",
       y = "Safely Managed Drinking Water Services (%)",
       title = "Correlation between Mortality Rate and Water Services (2019)",
       color = "Location Type",
       shape = "Location Type") +
  
  theme_minimal() +
  
  guides(color = guide_legend(override.aes = list(shape = 16)),
         shape = guide_legend(override.aes = list(color = "black")))
Warning in geom_point(aes(color = location_desc, shape = location_desc, :
Ignoring unknown aesthetics: text
# Convert to a plotly plot
gp <- ggplotly(plot_loess, tooltip = "text")
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
# Print the plot
gp
correlation_all <- cor(merged_data$latest_value.x[merged_data$location_desc == "All areas"], 
                       merged_data$value_2019[merged_data$location_desc == "All areas"])

correlation_urban <- cor(merged_data$latest_value.x[merged_data$location_desc == "Urban"], 
                         merged_data$value_2019[merged_data$location_desc == "Urban"])

correlation_rural <- cor(merged_data$latest_value.x[merged_data$location_desc == "Rural"], 
                         merged_data$value_2019[merged_data$location_desc == "Rural"])

blank_plot <- ggplot() +
  theme_void() +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  annotate("text",
           x = 0.5,
           y = 0.5,
           label = paste("Correlation All:",
                         round(correlation_all, 2), "\n",
                         "Correlation Urban:",
                         round(correlation_urban, 2), "\n",
                         "Correlation Rural:",
                         round(correlation_rural, 2)),
           size = 5,
           color = "black",
           hjust = 0.5)

# Convert to a plotly plot
gp_text <- ggplotly(blank_plot)
gp_text

8.4 Barchart and stacked barchart (without Transformation)

stacked_bar_chart <- ggplot(merged_data, aes(x = latest_value.x, fill = location_desc)) +
  geom_histogram(position = "stack", bins = 20) +
  labs(x = "Mortality Rate (deaths per 100,000 population)",
       y = "Count",
       title = "Stacked Bar Chart of Mortality Rates") +
  theme_minimal()

#change to plotly
ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart)
ggplot_stacked_bar_chart
grouped_bar_chart <- ggplot(merged_data, aes(x = latest_value.x, fill = location_desc)) +
  geom_histogram(position = "dodge", bins = 20) +
  labs(x = "Mortality Rate (deaths per 100,000 population)",
       y = "Count",
       title = "Grouped Bar Chart of Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart)
ggplot_grouped_bar_chart
# Filter data for the histogram
merged_data_all<- subset(merged_data, location_desc == "All areas")  # change to rural or urban if you want

histogram <- ggplot(merged_data_all, aes(x = latest_value.x, 
                             text = paste("Country:", geoAreaName, 
                                          "<br>Mortality Rate:", latest_value.x))) +
  geom_histogram(bins = 50, fill = 'grey', color = 'black') +
  labs(x = 'Mortality Rate (deaths per 100,000 population)',
       y = 'Count',
       title = 'Histogram of Mortality Rates for "All" locations') +
  theme_minimal()

# Convert ggplot histogram to a plotly plot
plotly_histogram <- ggplotly(histogram, tooltip = "text")

8.5 Normalise Data

merged_data$latest_value.x_positive <- merged_data$latest_value.x + abs(min(merged_data$latest_value.x)) + 0.1

# Apply the Box-Cox transformation
bc_result <- MASS::boxcox(merged_data$latest_value.x_positive ~ 1, 
                    lambda = seq(-3,3,0.1))

# The optimal lambda value is the one that maximizes the log-likelihood
optimal_lambda <- bc_result$x[which.max(bc_result$y)]

# Transform the data using the optimal lambda value
if (optimal_lambda == 0) {
  merged_data$latest_value.x_bc <- log(merged_data$latest_value.x_positive)
} else {
  merged_data$latest_value.x_bc <- (merged_data$latest_value.x_positive^optimal_lambda - 1) / optimal_lambda
}

# Shift the transformed variable to be non-negative
min_value <- min(merged_data$latest_value.x_bc)
merged_data$latest_value.x_bc <- merged_data$latest_value.x_bc + abs(min_value) + 0.1

8.6 Histogram (Normalised)

# Round the transformed mortality rates to the nearest whole number
merged_data$latest_value.x_bc_rounded <- round(merged_data$latest_value.x_bc)

# Convert the latest_value.x_bc_rounded variable into a categorical variable
merged_data$latest_value.x_bc_cat <- cut(merged_data$latest_value.x_bc_rounded, breaks = 50)

# Create a stacked bar chart
stacked_bar_chart_bc <- ggplot(merged_data, aes(x = latest_value.x_bc_rounded, fill = location_desc)) +
  geom_bar(position = "stack", bins =20) +
  labs(x = "Transformed Mortality Rate",
       y = "Count",
       title = "Stacked Bar Chart of Transformed Mortality Rates") +
  theme_minimal()
Warning in geom_bar(position = "stack", bins = 20): Ignoring unknown
parameters: `bins`
ggplot_stacked_bar_chart_bc <- ggplotly(stacked_bar_chart_bc)
ggplot_stacked_bar_chart_bc
# Create a grouped bar chart
grouped_bar_chart_bc <- ggplot(merged_data, aes(x = latest_value.x_bc_rounded, fill = location_desc)) +
  geom_bar(position = "dodge") +
  labs(x = "Transformed Mortality Rate",
       y = "Count",
       title = "Grouped Bar Chart of Transformed Mortality Rates") +
  theme_minimal()

# Convert to a plotly plot
ggplot_grouped_bar_chart_bc <- ggplotly(grouped_bar_chart_bc)
ggplot_grouped_bar_chart_bc

8.7 Scatter plot Normalised

plot_loess <- ggplot(merged_data, aes(x = latest_value.x_bc, y = value_2019)) +
  
  geom_point(aes(color = location_desc, 
                 shape = location_desc,
                 text = paste("Country:", geoAreaName, "<br>", "Location:", location_desc)), alpha = 0.7) +
  
  geom_smooth(data = subset(merged_data, location_desc == "All areas"),
              method = loess, se = FALSE, 
              aes(color = location_desc, weight = value_2019),
              show.legend = TRUE) +
  
  geom_smooth(data = subset(merged_data, location_desc == "Urban"),
              method = loess,
              se = FALSE,
              aes(color = location_desc, weight = value_2019),
              show.legend = TRUE) +

  geom_smooth(data = subset(merged_data, location_desc == "Rural"),
              method = loess,
              se = FALSE,
              aes(color = location_desc, weight = value_2019),
              show.legend = TRUE) +
  
  labs(x = "Transformed Mortality Rate (deaths per 100,000 population)",
       y = "Safely Managed Drinking Water Services (%)",
       title = "Correlation between Transformed Mortality Rate and Water Services (Normalised)",
       color = "Location Type",
       shape = "Location Type") +
  
  theme_minimal() +
  
  guides(color = guide_legend(override.aes = list(shape = 16)),
         shape = guide_legend(override.aes = list(color = "black")))
Warning in geom_point(aes(color = location_desc, shape = location_desc, :
Ignoring unknown aesthetics: text
gp_normalised <- ggplotly(plot_loess, tooltip = "text")
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp_normalised

8.8 Correlation normalised

correlation_all <- cor(merged_data$latest_value.x_bc[merged_data$location_desc == "All areas"], 
                       merged_data$value_2019[merged_data$location_desc == "All areas"])

correlation_urban <- cor(merged_data$latest_value.x_bc[merged_data$location_desc == "Urban"], 
                         merged_data$value_2019[merged_data$location_desc == "Urban"])

correlation_rural <- cor(merged_data$latest_value.x_bc[merged_data$location_desc == "Rural"], 
                         merged_data$value_2019[merged_data$location_desc == "Rural"])

blank_plot <- ggplot() +
  theme_void() +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  annotate("text",
           x = 0.5,
           y = 0.5,
           label = paste("Correlation All:",
                         round(correlation_all, 2), "\n",
                         "Correlation Urban:",
                         round(correlation_urban, 2), "\n",
                         "Correlation Rural:",
                         round(correlation_rural, 2)),
           size = 5,
           color = "black",
           hjust = 0.5)

gp_text_normalised <- ggplotly(blank_plot)
gp_text_normalised

8.9 Combining normal and unormalised plots

combined_plot <- subplot(
  gp, gp_text, 
  gp_normalised, gp_text_normalised, 
  nrows = 2, margin = 0.05
)

# Print the combined plot
combined_plot

9 Four Indicators

#mortality_rate_attributed_to_unsafe_water
mortality_rate_unsafe_water_indicators <- read_csv("data/indicator_3.9.2.csv",show_col_types = FALSE)

#Proportion_of_population_using_safely_managed_drinking_water_services
proportion_of_safe_water_indicators<- read_csv("data/indicator_6.1.1.csv",show_col_types = FALSE)

#mortality_rate_unintentional_poisoning
mortality_rate_unintentional_poisoning_indicators <- read_csv("data/indicator_3.9.3.csv",show_col_types = FALSE)

#proportion-of-population-with-basic-handwashing-facilities-on-premises-by-urban-rural-percent
population_with_basic_handwashing_facilities_indicators<- read_csv("data/indicator_6.2.1.csv",show_col_types = FALSE)
mortality_rate_unsafe_water_indicators <- dplyr::select(
  mortality_rate_unsafe_water_indicators,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  MR_unsafe_water = 'latest_value'
)

mortality_rate_unsafe_water_indicators <- mortality_rate_unsafe_water_indicators %>%
  distinct()

mortality_rate_unsafe_water_indicators
# A tibble: 183 × 5
    code country                          region           iso   MR_unsafe_water
   <dbl> <chr>                            <chr>            <chr>           <dbl>
 1   583 Micronesia (Federated States of) Oceania (exc. A… FSM               3.6
 2   496 Mongolia                         Eastern Asia     MNG               1.3
 3   499 Montenegro                       Southern Europe  MNE               0  
 4   504 Morocco                          Northern Africa  MAR               1.9
 5   508 Mozambique                       Eastern Africa   MOZ              27.6
 6   104 Myanmar                          South-Eastern A… MMR              12.6
 7   516 Namibia                          Southern Africa  NAM              18.3
 8   524 Nepal                            Southern Asia (… NPL              19.8
 9   528 Netherlands                      Western Europe   NLD               0.2
10   554 New Zealand                      Australia and N… NZL               0.1
# ℹ 173 more rows
proportion_of_safe_water_indicators <- dplyr::select(
  proportion_of_safe_water_indicators,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  urbanisation = "location_desc",
  sanitation_access = "value_2019",
)

# Remove duplicates
proportion_of_safe_water_indicators <- proportion_of_safe_water_indicators%>%
  distinct()

proportion_of_safe_water_indicators
# A tibble: 286 × 6
    code country                     region iso   urbanisation sanitation_access
   <dbl> <chr>                       <chr>  <chr> <chr>                    <dbl>
 1   807 North Macedonia             South… MKD   Rural                       66
 2   804 Ukraine                     Easte… UKR   Urban                       89
 3   807 North Macedonia             South… MKD   Urban                       85
 4   826 United Kingdom of Great Br… North… GBR   All areas                  100
 5   580 Northern Mariana Islands    Ocean… MNP   All areas                   91
 6   840 United States of America    North… USA   All areas                   97
 7   578 Norway                      North… NOR   All areas                   99
 8   512 Oman                        Weste… OMN   All areas                   90
 9   586 Pakistan                    South… PAK   All areas                   36
10   586 Pakistan                    South… PAK   Rural                       33
# ℹ 276 more rows
mortality_rate_unintentional_poisoning_indicators <- dplyr::select(
  mortality_rate_unintentional_poisoning_indicators,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  gender = 'sex_desc',
  MR_poisoning = 'value_2019'
)

# drop rows if contains at least one NA value
mortality_rate_unintentional_poisoning_indicators <- na.omit(mortality_rate_unintentional_poisoning_indicators)

# Remove duplicates
mortality_rate_unintentional_poisoning_indicators <- mortality_rate_unintentional_poisoning_indicators%>%
  distinct()

#check for empty row
any(is.na(mortality_rate_unintentional_poisoning_indicators))
[1] FALSE
mortality_rate_unintentional_poisoning_indicators
# A tibble: 549 × 6
    code country     region                          iso   gender   MR_poisoning
   <dbl> <chr>       <chr>                           <chr> <chr>           <dbl>
 1   470 Malta       Southern Europe                 MLT   Both se…          0.1
 2   152 Chile       South America                   CHL   Male              0.5
 3     4 Afghanistan Southern Asia (excluding India) AFG   Both se…          1  
 4   470 Malta       Southern Europe                 MLT   Female            0  
 5   156 China       Eastern Asia                    CHN   Both se…          1.8
 6   470 Malta       Southern Europe                 MLT   Male              0.2
 7   156 China       Eastern Asia                    CHN   Female            1.5
 8   478 Mauritania  Western Africa                  MRT   Both se…          1.5
 9   478 Mauritania  Western Africa                  MRT   Female            1.3
10   478 Mauritania  Western Africa                  MRT   Male              1.7
# ℹ 539 more rows
population_with_basic_handwashing_facilities_indicators <- dplyr::select(
  population_with_basic_handwashing_facilities_indicators,
  code = 'geoAreaCode',
  country = 'geoAreaName',
  region = 'parentName',
  iso = 'ISO3',
  urbanisation = 'location_desc',
  handwash_access = 'value_2019'
)

# drop rows if contains at least one NA value
population_with_basic_handwashing_facilities_indicators <- na.omit(population_with_basic_handwashing_facilities_indicators)

# Remove duplicates
population_with_basic_handwashing_facilities_indicators <- population_with_basic_handwashing_facilities_indicators%>%
  distinct()

#check for empty row
any(is.na(mortality_rate_unintentional_poisoning_indicators))
[1] FALSE
population_with_basic_handwashing_facilities_indicators
# A tibble: 250 × 6
    code country      region                  iso   urbanisation handwash_access
   <dbl> <chr>        <chr>                   <chr> <chr>                  <dbl>
 1     4 Afghanistan  Southern Asia (excludi… AFG   All areas                 38
 2   356 India        Southern Asia           IND   Urban                     82
 3   360 Indonesia    South-Eastern Asia      IDN   All areas                 94
 4     4 Afghanistan  Southern Asia (excludi… AFG   Rural                     29
 5   694 Sierra Leone Western Africa          SLE   Rural                     18
 6   360 Indonesia    South-Eastern Asia      IDN   Rural                     91
 7     4 Afghanistan  Southern Asia (excludi… AFG   Urban                     64
 8    12 Algeria      Northern Africa         DZA   All areas                 85
 9    12 Algeria      Northern Africa         DZA   Rural                     75
10    12 Algeria      Northern Africa         DZA   Urban                     88
# ℹ 240 more rows

9.1 Joining all 4 indicators

countries_tb <- left_join(
  mortality_rate_unintentional_poisoning_indicators,
  population_with_basic_handwashing_facilities_indicators,
  by = join_by(country, iso)
)
  
countries_tb <- left_join(
  countries_tb,
  mortality_rate_unsafe_water_indicators,
  by = join_by(iso)
) 
  
countries_tb <- left_join(
  countries_tb,
  proportion_of_safe_water_indicators,
  by = join_by(iso, urbanisation)
) |>  
  mutate(
    iso = case_match(
      iso,
      "COD" ~ "ZAR",
      "ROU" ~ "ROM",
      "TLS" ~ "TMP",
      "XKX" ~ "KSV",
      .default = iso
    )
  )

names(countries_tb)
 [1] "code.x"            "country.x"         "region.x"         
 [4] "iso"               "gender"            "MR_poisoning"     
 [7] "code.y"            "region.y"          "urbanisation"     
[10] "handwash_access"   "code.x.x"          "country.y"        
[13] "region.x.x"        "MR_unsafe_water"   "code.y.y"         
[16] "country"           "region.y.y"        "sanitation_access"

9.2 Joining with adminGeoJson

countries_sf <- read_sf("./data/WB_countries_Admin0.geojson")
countries_tb <- left_join(
  countries_tb,
  dplyr::select(countries_sf,WB_A3, GDP_MD_EST, INCOME_GRP, POP_EST),
  by = c("iso" = "WB_A3")
)
Warning in left_join(countries_tb, dplyr::select(countries_sf, WB_A3, GDP_MD_EST, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 325 of `x` matches multiple rows in `y`.
ℹ Row 126 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
countries_tb
# A tibble: 1,038 × 22
   code.x country.x   region.x         iso   gender MR_poisoning code.y region.y
    <dbl> <chr>       <chr>            <chr> <chr>         <dbl>  <dbl> <chr>   
 1    470 Malta       Southern Europe  MLT   Both …          0.1     NA <NA>    
 2    152 Chile       South America    CHL   Male            0.5     NA <NA>    
 3      4 Afghanistan Southern Asia (… AFG   Both …          1        4 Souther…
 4      4 Afghanistan Southern Asia (… AFG   Both …          1        4 Souther…
 5      4 Afghanistan Southern Asia (… AFG   Both …          1        4 Souther…
 6    470 Malta       Southern Europe  MLT   Female          0       NA <NA>    
 7    156 China       Eastern Asia     CHN   Both …          1.8     NA <NA>    
 8    470 Malta       Southern Europe  MLT   Male            0.2     NA <NA>    
 9    156 China       Eastern Asia     CHN   Female          1.5     NA <NA>    
10    478 Mauritania  Western Africa   MRT   Both …          1.5    478 Western…
# ℹ 1,028 more rows
# ℹ 14 more variables: urbanisation <chr>, handwash_access <dbl>,
#   code.x.x <dbl>, country.y <chr>, region.x.x <chr>, MR_unsafe_water <dbl>,
#   code.y.y <dbl>, country <chr>, region.y.y <chr>, sanitation_access <dbl>,
#   GDP_MD_EST <dbl>, INCOME_GRP <chr>, POP_EST <int>,
#   geometry <MULTIPOLYGON [°]>

9.3 Clean data

Remove certain columns and rename column for cleaner a look

# Drop
clean_datatable <- dplyr::select(countries_tb, 
                          -code.y, -code.x,
                          -region.y, -region.x,
                          -code.x.x, code.y.y, 
                          -country.x, -country.y,
                          -region.x.x, -region.y.y, 
                          -iso)  
# Rename
clean_datatable <- clean_datatable %>% rename(
  gdp = GDP_MD_EST,
  income_group = INCOME_GRP,
  population = POP_EST)

# Rearrange
clean_datatable <- clean_datatable %>%
  dplyr::select(country,
         gdp,
         income_group,
         population,
         urbanisation, 
         gender, 
         handwash_access,  
         sanitation_access, 
         MR_poisoning, 
         MR_unsafe_water,)


datatable(clean_datatable)

9.4 Correlation between 4 indicators

# filter gender of both sexes to reduce duplicates
countries_tb <- countries_tb |>
  filter(gender == "Both sexes") |>
  na.omit(countries_tb)
countries_tb
# A tibble: 143 × 22
   code.x country.x   region.x         iso   gender MR_poisoning code.y region.y
    <dbl> <chr>       <chr>            <chr> <chr>         <dbl>  <dbl> <chr>   
 1      4 Afghanistan Southern Asia (… AFG   Both …          1        4 Souther…
 2      4 Afghanistan Southern Asia (… AFG   Both …          1        4 Souther…
 3      4 Afghanistan Southern Asia (… AFG   Both …          1        4 Souther…
 4    484 Mexico      Central America  MEX   Both …          0.4    484 Central…
 5    496 Mongolia    Eastern Asia     MNG   Both …          2.8    496 Eastern…
 6    496 Mongolia    Eastern Asia     MNG   Both …          2.8    496 Eastern…
 7    496 Mongolia    Eastern Asia     MNG   Both …          2.8    496 Eastern…
 8    499 Montenegro  Southern Europe  MNE   Both …          0.6    499 Souther…
 9    499 Montenegro  Southern Europe  MNE   Both …          0.6    499 Souther…
10    104 Myanmar     South-Eastern A… MMR   Both …          1.3    104 South-E…
# ℹ 133 more rows
# ℹ 14 more variables: urbanisation <chr>, handwash_access <dbl>,
#   code.x.x <dbl>, country.y <chr>, region.x.x <chr>, MR_unsafe_water <dbl>,
#   code.y.y <dbl>, country <chr>, region.y.y <chr>, sanitation_access <dbl>,
#   GDP_MD_EST <dbl>, INCOME_GRP <chr>, POP_EST <int>,
#   geometry <MULTIPOLYGON [°]>
countries_tb_subset <- dplyr::select(countries_tb, MR_poisoning, MR_unsafe_water, handwash_access, sanitation_access,urbanisation,country.x)

# Display the subsetted data
print(countries_tb_subset)
# A tibble: 143 × 6
   MR_poisoning MR_unsafe_water handwash_access sanitation_access urbanisation
          <dbl>           <dbl>           <dbl>             <dbl> <chr>       
 1          1              13.9              38                27 All areas   
 2          1              13.9              29                24 Rural       
 3          1              13.9              64                36 Urban       
 4          0.4             1.1              90                43 All areas   
 5          2.8             1.3              84                30 All areas   
 6          2.8             1.3              77                11 Rural       
 7          2.8             1.3              88                38 Urban       
 8          0.6             0                99                85 All areas   
 9          0.6             0                99                87 Urban       
10          1.3            12.6              74                58 All areas   
# ℹ 133 more rows
# ℹ 1 more variable: country.x <chr>

9.5 Scatterplot matrix

#set axis names
countries_tb_subset_corr4 <- dplyr::select(countries_tb_subset, MR_unsafe_water, MR_poisoning, handwash_access, sanitation_access, urbanisation,country.x)
colnames(countries_tb_subset_corr4) <- c("Unsafe Water Mortality Rate",
                                         "Poisoning Mortality",
                                         "Handwashing Access (%)",
                                         "Sanitation Access (%)",
                                         "Urbanisation",
                                         "Countries"
                                         )
countries_tb_subset_corr4
# A tibble: 143 × 6
   `Unsafe Water Mortality Rate` `Poisoning Mortality` `Handwashing Access (%)`
                           <dbl>                 <dbl>                    <dbl>
 1                          13.9                   1                         38
 2                          13.9                   1                         29
 3                          13.9                   1                         64
 4                           1.1                   0.4                       90
 5                           1.3                   2.8                       84
 6                           1.3                   2.8                       77
 7                           1.3                   2.8                       88
 8                           0                     0.6                       99
 9                           0                     0.6                       99
10                          12.6                   1.3                       74
# ℹ 133 more rows
# ℹ 3 more variables: `Sanitation Access (%)` <dbl>, Urbanisation <chr>,
#   Countries <chr>
#change to factor for colour

countries_tb_subset_corr4$Urbanisation <- as.factor(countries_tb_subset_corr4$Urbanisation)

plot <- ggpairs(data = countries_tb_subset_corr4,
                columns = 1:4,
                upper = list(continuous = "cor"),
                lower = list(continuous = "smooth", se=FALSE),
                diag = list(continuous = "bar", bin=30),
                mapping = aes(color = Urbanisation),
                tooltips = c("Countries")) +
  theme_minimal() +
  theme(axis.title = element_text(size = 5))
Warning in warn_if_args_exist(list(...)): Extra arguments: "tooltips" are being
ignored.  If these are meant to be aesthetics, submit them using the 'mapping'
variable within ggpairs with ggplot2::aes or ggplot2::aes_string.
Warning in check_and_set_ggpairs_defaults("diag", diag, continuous =
"densityDiag", : Changing diag$continuous from 'bar' to 'barDiag'
plot
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Convert ggplot to plotly
plot_interactive <- ggplotly(plot)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Can only have one: highlight
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Can only have one: highlight
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Can only have one: highlight
plot_interactive <- layout(plot_interactive,
       font = list(size = 8)
)

# Render the plotly scatterplot matrix
plot_interactive

9.6 Different Scatter plot matrix

countries_tb_subset_corr4 <- dplyr::select(countries_tb_subset, MR_unsafe_water, MR_poisoning, handwash_access, sanitation_access, urbanisation)

# Rename the columns
colnames(countries_tb_subset_corr4) <- c("Unsafe Water Mortality Rate", 
                                         "Poisoning Mortality",
                                         "Handwashing Access (%)", 
                                         "Sanitation Access (%)",
                                         "Urbanisation")

# Make urbanisation as a factor for colourisation
countries_tb_subset_corr4$Urbanisation <- as.factor(countries_tb_subset_corr4$Urbanisation)

# Map urbanisation to colors
color_mapping <- c("All areas" = "red", "Urban" = "blue", "Rural" = "green") # Adjust to your actual factor levels and desired colors
colors <- color_mapping[countries_tb_subset_corr4$Urbanisation]

# Create scatterplot matrix without colors
plot <- pairs.panels(countries_tb_subset_corr4[, 1:4],
                     hist.col = "#00AFBB",
                     bg = "transparent")